Class 6 Multilevel Model

# data manipulation
library(tidyverse)
library(labelled)

# plotting 
library(ggdist)
library(gghalves)  
library(ggbeeswarm)
library(patchwork)

# estimation
library(lme4)
library(fixest)


# model summaries and predictions 
library(ggeffects)
library(modelsummary)

Data Import

(you don’t actually need to run this)

library(WDI)
library(tidyverse)
library(countrycode)

indicators <- list('gini' = 'SI.POV.GINI',
                'wlfp' = 'SL.TLF.ACTI.ZS'  ,# Labor Force Participation Rate (%), Female
                'gdp_pc_ppp' = "NY.GDP.PCAP.PP.KD",# "GDP per capita, PPP (constant 2005 international $)"
                'resource_rents' = 'NY.GDP.TOTL.RT.ZS'
                )  

wdi <- WDI(indicator=indicators,
          start = 2020,
          end = 2023
          )
wdi$country_id<- countrycode(wdi$iso3c, origin='iso3c', destination ='vdem')
wdi_filled<-wdi|>
  group_by(country)|>
  arrange(year)|>
  fill(c(resource_rents, gini, wlfp), .direction='downup')|>
  slice_tail(n=1)


polregions<-c('Eastern Europe', #  (including German Democratic Republic, excluding the Caucasus)
              'Latin America and the Caribbean',
              'The Middle East and North Africa', # (including Israel and Türkiye, excluding Cyprus)
              'Sub-Saharan Africa',
              'Western Europe and North America', #  (including Cyprus, but excluding German Democratic Republic)
              'East Asia and the Pacific',
              'South and Central Asia' #(including the Caucasus)
              
              )

# Electoral system data 
elsystem<-vdemdata::vdem|>
  arrange(year)|>
  filter(year>=2010)|>
  group_by(country_name)|>
  fill(v2elparlel, .direction='down')|>
  slice_tail(n=1)|>
  select(country_name,v2elparlel)
  

vdem<-vdemdata::vdem|>
   arrange(year)|>
  filter(v2x_elecreg == 1)|>
  filter(year>=2020)|>
  group_by(country_name)|>
  select(starts_with('country'), v2lgfemleg, v2x_elecreg, v2lgqugen, v2lgqugent, e_regionpol_7C,
         e_fh_pr, e_fh_cl,e_fh_status)|>
  
  fill(everything(),.direction='down')|>
  slice_tail(n=1)|>
  ungroup()|>
  left_join(wdi_filled, by=join_by(country_id == country_id))|>
  select(-starts_with("iso"), -country_id, -country, -year)|>
  left_join(elsystem, by='country_name')|>
  mutate(region = factor(e_regionpol_7C, labels=polregions),
         log_gdp_pc_ppp = log(gdp_pc_ppp),
         fh_status = factor(e_fh_status, levels=c(1,2, 3), labels=c('Free', "Partly Free", "Not Free")),
         fh_status = fct_relevel(fh_status, "Not Free", "Partly Free", "Free"),
         free_dummy = factor(fh_status == "Free", labels=c("Not Free", "Free")),
         majoritarian_dummy = factor(v2elparlel ==0, labels=c("Not Majoritarian", "Majoritarian"))
         )

vdem<-set_variable_labels(vdem, 
                    'v2lgfemleg' = "Lower chamber % female legislators", 
                    'v2x_elecreg' = "", 
                    'v2lgqugen' = 'Lower chamber gender quota', 
                    'v2lgqugent' = 'Lower chamber gender quota threshold', 
                    'resource_rents' = 'Total natural resources rents (% of GDP)',
                    'gini'= 'Gini coefficient', 
                    'wlfp' = "Female labor force participation",
                    'log_gdp_pc_ppp' = 'Logged GDP per capita, PPP',
                    'region' ='Region (politico-geographic 7-category)',
                    'majoritarian_dummy' = 'Lower chamber electoral system'
                    )


vdem_cleaned<-vdem|>
  drop_na(v2lgfemleg, v2lgqugen , wlfp , resource_rents , log_gdp_pc_ppp, e_fh_pr, e_fh_cl)

#saveRDS(vdem_cleaned,file='vdem_cleaned.rds')
if(!file.exists('vdem_clean.rds')){
  download.file('https://github.com/Neilblund/GVPT728_Winter24/raw/refs/heads/main/Additional%20Code%20and%20Data/vdem_cleaned.rds',
                'vdem_clean.rds',
                mode='wb')

}

vdem_cleaned<-readRDS('vdem_clean.rds')

Describing the main relationship

In general, do countries with majoritarian electoral systems elect fewer women?

relationship <- ggplot(vdem_cleaned, aes(x = majoritarian_dummy, y = v2lgfemleg)) +
  geom_half_point(aes(color =  majoritarian_dummy), 
                  transformation = position_quasirandom(width = 0.1),
                  side = "l", size = 1, alpha = 0.5) +
  geom_half_boxplot(aes(fill = majoritarian_dummy), side = "r") + 
  
  guides(color = "none", fill = "none") +
  labs(x = "Lower House Electoral System", y = "% women in legislature") +
  theme_bw() +
  scale_fill_brewer(palette='Dark2') +
  scale_color_brewer(palette='Dark2')


relationship

However: might want to control for some additional factors that could influence this relationship.

  • Women’s Labor Force Participation

  • logged GDP

  • Whether a country has quotas guaranteeing a certain number of seats for women in the legislature.

data<-vdem_cleaned|>

  select(v2lgfemleg, wlfp, log_gdp_pc_ppp, v2lgqugen, majoritarian_dummy)

GGally::ggpairs(data, aes(color=majoritarian_dummy)) +
  theme_bw() +
  scale_color_brewer(palette='Dark2')  +
    scale_fill_brewer(palette='Dark2') 

Using OLS

model_0<-lm(v2lgfemleg ~ 
               majoritarian_dummy +
               wlfp + 
               log_gdp_pc_ppp +
               v2lgqugen,
               data=vdem_cleaned)



modelsummary(list("OLS" = model_0), 
              coef_rename=TRUE,
             coef_omit = "Intercept",
              estimate  = "{estimate}",  
             statistic = c("conf.int"),
             conf_level = .95,        
 note = "95% CI in brackets",
 gof_omit = 'F|RMSE|R2$|AIC|Log.Lik.',
             )
tinytable_gdge5n3v141qhuuhxiy8
OLS
95% CI in brackets
Lower chamber electoral system [Majoritarian] -6.195
[-10.102, -2.289]
Female labor force participation 0.304
[0.132, 0.477]
Logged GDP per capita, PPP 1.316
[-0.288, 2.921]
Lower chamber gender quota 2.332
[1.140, 3.525]
Num.Obs. 157
R2 Adj. 0.229
BIC 1219.2

The effect of majoritarianism (relative to non-majoritarian electoral systems) is a estimated to be around a 6% decrease in the % women in the legislature. The presence of quotas, unsurprisingly, also has some impact, as does female labor force participation. While GDP may also play a role, the 95% confidence interval contains zero and is not statistically significant at conventional levels.

Fixed Effects

We might be concerned that this model fails to capture important variation. For instance: maybe cultural norms, colonial legacies, or simple geography play a major role in determining the outcome of interest?

ggplot(vdem_cleaned, aes(x=wlfp, y=v2lgfemleg)) + 
  geom_point(aes(color=majoritarian_dummy)) +
  theme_bw() +
  scale_color_brewer(palette='Dark2')  +
  scale_fill_brewer(palette='Dark2')  +
  labs(x="Women's Labor Force Participation",
       y="% women in the legislature",
       color = 'Electoral System'
       ) +
  facet_wrap(~region)

While we don’t have direct measures of a lot of these characteristics, we might try to account for them by including k-1 region-level dummy variables in the model. This would be the a fixed effects model:

model_1<-feols(v2lgfemleg ~ 
               majoritarian_dummy +
               wlfp + 
               log_gdp_pc_ppp +
               v2lgqugen | region,
               data=vdem_cleaned, 
               cluster ~ region)

model_1
OLS estimation, Dep. Var.: v2lgfemleg
Observations: 157
Fixed-effects: region: 7
Standard-errors: Clustered (region) 
                                Estimate Std. Error  t value  Pr(>|t|)    
majoritarian_dummyMajoritarian -5.449041   2.179653 -2.49996 0.0465309 *  
wlfp                            0.195182   0.082976  2.35227 0.0568819 .  
log_gdp_pc_ppp                  1.735662   1.109650  1.56415 0.1688154    
v2lgqugen                       2.129287   0.529949  4.01791 0.0069754 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
RMSE: 9.90766     Adj. R2: 0.307379
                Within R2: 0.175529
modelsummary(list("OLS" = model_0, "Region FE" = model_1), 
             coef_rename=TRUE,
             coef_omit = "Intercept",
             estimate  = "{estimate}",  
             statistic = c("conf.int"),
             conf_level = .95,        
             note = "95% CI in brackets",
             gof_omit = 'F|RMSE|R2$|AIC|Log.Lik.',
             )
tinytable_1dm4zj63tk4vfwszt1dm
OLS Region FE
95% CI in brackets
Lower chamber electoral system [Majoritarian] -6.195 -5.449
[-10.102, -2.289] [-10.782, -0.116]
Female labor force participation 0.304 0.195
[0.132, 0.477] [-0.008, 0.398]
Logged GDP per capita, PPP 1.316 1.736
[-0.288, 2.921] [-0.980, 4.451]
Lower chamber gender quota 2.332 2.129
[1.140, 3.525] [0.833, 3.426]
Num.Obs. 157 157
R2 Adj. 0.229 0.307
R2 Within 0.176
R2 Within Adj. 0.153
BIC 1219.2 1221.3
Std.Errors by: region

Notably, the inclusion of fixed effects here doesn’t drastically alter our estimates, and we still find that majoritarian systems have about the same expected negative effect on the dependent variable that they did in our original linear model.

While the fixed effects terms are automatically left out of the model output here, we can view them by running:

fixef(model_1)
$region
       East Asia and the Pacific                   Eastern Europe 
                      -9.1597424                       -6.6165617 
 Latin America and the Caribbean           South and Central Asia 
                      -1.8319281                       -7.5443107 
              Sub-Saharan Africa The Middle East and North Africa 
                      -2.8207371                      -13.3610522 
Western Europe and North America 
                       0.9046372 

attr(,"class")
[1] "fixest.fixef" "list"        
attr(,"exponential")
[1] FALSE

We can think of these as the separate Y-intercept terms for each region in our data set.

fixef_slopes<-predict_response(model_1, terms=c('majoritarian_dummy','region'))|>
  plot(ci=FALSE, connect_lines=TRUE, show_data=TRUE, jitter=TRUE) +
  ggtitle("Fixed Effects") +
  scale_color_brewer(palette='Dark2')

fixef_slopes

Multilevel Model

While both the standard OLS model and the fixed effects model seem to work reasonably well here, there may be cases where a multilevel model is more appropriate. This could be for several reasons. For instance:

  • We might think that the fixed effect for some small regions are implausibly low or high. After all: if a region only has a handful of countries, we might find that region has a really high or really low value of the dependent variable by random chance. Since random effects pool estimates across levels of the random effect, a random effects model may be a better fit.

  • We might want to investigate a characteristic that doesn’t vary much or at all within regions. For instance: we might have a variable for colonial legacy, primary religion, or membership in a regional organization. Since some of these will vary very little within each region, these controls would be perfectly or near-perfectly co-linear with the fixed effects estimator.

To run a linear random effects model, we’ll use the lmer function and include a random effect term. The lme4 package provides a whole bunch of ways to specify complex hierarchical models, but if we have a single random effect term, this is pretty straightforward: we just add something like:

+ (1|random_effect_term)

…to our formula. So here’s how I can create a region-level random effect:

model_2<-lmer(v2lgfemleg ~ 
               majoritarian_dummy +
               wlfp + 
               log_gdp_pc_ppp +
               v2lgqugen +
               (1|region), data=vdem_cleaned)

Now we can summarize our results in a single table:

model_list<-list("OLS" = model_0, "Region FE" = model_1, "Region RE" =model_2)

modelsummary(model_list, 
             coef_rename=TRUE,
             coef_omit = "Intercept",
             estimate  = "{estimate}",  
             statistic = c("conf.int"),
             conf_level = .95,        
             note = "95% CI in brackets",
             gof_omit = 'F|RMSE|R2$|AIC|Log.Lik.|R2 Marg|R2 Cond|R2 Within',
             )
tinytable_sapr58kghmpluaz20mhk
OLS Region FE Region RE
95% CI in brackets
Lower chamber electoral system [Majoritarian] -6.195 -5.449 -5.746
[-10.102, -2.289] [-10.782, -0.116] [-9.559, -1.933]
Female labor force participation 0.304 0.195 0.229
[0.132, 0.477] [-0.008, 0.398] [0.056, 0.402]
Logged GDP per capita, PPP 1.316 1.736 1.716
[-0.288, 2.921] [-0.980, 4.451] [-0.342, 3.774]
Lower chamber gender quota 2.332 2.129 2.166
[1.140, 3.525] [0.833, 3.426] [1.013, 3.319]
SD (Observations) 10.273
Num.Obs. 157 157 157
R2 Adj. 0.229 0.307
BIC 1219.2 1221.3 1212.3
ICC 0.1
Std.Errors by: region

And/Or plot the coefficients

modelplot(model_list,
          coef_rename=TRUE,
          coef_omit = c(1)
          ) +
  scale_color_brewer(palette='Dark2') +
  geom_vline(xintercept=0, lty=2)

In contrast to the fixed effects model, which allows each region to have a totally separate Y-intercept, the random effects model “pools” the estimated intercepts across each group and shrinks them toward the global mean. The result is that the differences in the intercept terms are less dramatic, especially for smaller groups where there’s less data.

raneff_slopes<-predict_response(model_2, terms=c('majoritarian_dummy','region'), margin='empirical')|>
  plot( ci=FALSE, show_data=TRUE, connect_lines=TRUE, jitter=TRUE) +
  ggtitle("Region Random Effects") +
  scale_color_brewer(palette='Dark2')


fixef_slopes / raneff_slopes +
    plot_layout(guides = "collect")